library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
✓ ggplot2 3.3.4 ✓ purrr 0.3.4
✓ tibble 3.1.2 ✓ dplyr 1.0.7
✓ tidyr 1.1.3 ✓ stringr 1.4.0
✓ readr 1.4.0 ✓ forcats 0.5.1
── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
library(SingleCellExperiment)
Loading required package: SummarizedExperiment
Loading required package: MatrixGenerics
Loading required package: matrixStats
Attaching package: ‘matrixStats’
The following object is masked from ‘package:dplyr’:
count
Attaching package: ‘MatrixGenerics’
The following objects are masked from ‘package:matrixStats’:
colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse, colCounts, colCummaxs, colCummins, colCumprods, colCumsums, colDiffs, colIQRDiffs, colIQRs,
colLogSumExps, colMadDiffs, colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats, colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads, colWeightedMeans, colWeightedMedians, colWeightedSds, colWeightedVars, rowAlls, rowAnyNAs, rowAnys,
rowAvgsPerColSet, rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods, rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps, rowMadDiffs, rowMads,
rowMaxs, rowMeans2, rowMedians, rowMins, rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks, rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs,
rowVars, rowWeightedMads, rowWeightedMeans, rowWeightedMedians, rowWeightedSds, rowWeightedVars
Loading required package: GenomicRanges
Loading required package: stats4
Loading required package: BiocGenerics
Loading required package: parallel
Attaching package: ‘BiocGenerics’
The following objects are masked from ‘package:parallel’:
clusterApply, clusterApplyLB, clusterCall, clusterEvalQ, clusterExport, clusterMap, parApply, parCapply, parLapply, parLapplyLB, parRapply, parSapply, parSapplyLB
The following objects are masked from ‘package:dplyr’:
combine, intersect, setdiff, union
The following objects are masked from ‘package:stats’:
IQR, mad, sd, var, xtabs
The following objects are masked from ‘package:base’:
anyDuplicated, append, as.data.frame, basename, cbind, colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep, grepl, intersect,
is.unsorted, lapply, Map, mapply, match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
table, tapply, union, unique, unsplit, which.max, which.min
Loading required package: S4Vectors
Attaching package: ‘S4Vectors’
The following objects are masked from ‘package:dplyr’:
first, rename
The following object is masked from ‘package:tidyr’:
expand
The following objects are masked from ‘package:base’:
expand.grid, I, unname
Loading required package: IRanges
Attaching package: ‘IRanges’
The following objects are masked from ‘package:dplyr’:
collapse, desc, slice
The following object is masked from ‘package:purrr’:
reduce
Loading required package: GenomeInfoDb
Loading required package: Biobase
Welcome to Bioconductor
Vignettes contain introductory material; view with 'browseVignettes()'. To cite Bioconductor, see 'citation("Biobase")', and for packages 'citation("pkgname")'.
Attaching package: ‘Biobase’
The following object is masked from ‘package:MatrixGenerics’:
rowMedians
The following objects are masked from ‘package:matrixStats’:
anyMissing, rowMedians
# Make sure that you use the forked version
# devtools::install_github("const-ae/sctransform@offset_with_flexible_theta")
library(sctransform)
# Work around for some bug in zellkonverter
# https://github.com/theislab/zellkonverter/issues/45
setAs("dgRMatrix", to = "dgCMatrix", function(from){
as(as(from, "CsparseMatrix"), "dgCMatrix")
})
if(! file.exists("../data/nih3t3.h5ad")){
download.file("https://data.caltech.edu/tindfiles/serve/a448e98e-89cd-47b3-a134-803bbde29781/", "../data/nih3t3.h5ad")
}
if(! file.exists("../data/hek293t.h5ad")){
download.file("https://data.caltech.edu/tindfiles/serve/b2758046-9247-43ab-b8f0-68882b4f39a3/", "../data/hek293t.h5ad")
}
Helper function, so that the content of the sctransform results stay in order
transform_sctrans_object <- function(sctrans_obj, Y){
fit <- list(overdispersions = rep(NA, nrow(Y)),
PearsonResiduals = array(NA, dim(Y)),
Mu <- array(NA, dim(Y)),
Beta = matrix(NA, nrow = nrow(Y), ncol = 2),
Beta_unreg = matrix(NA, nrow = nrow(Y), ncol = 2),
overdispersions_unreg = rep(NA, nrow(Y)),
gmean = rep(NA, nrow(Y)))
sf <- colSums2(Y)
indices <- match(rownames(sctrans_obj$model_pars_fit), rownames(Y))
indices_unreg <- match(rownames(sctrans_obj$model_pars), rownames(Y))
fit$overdispersions[indices] <- 1/sctrans_obj$model_pars_fit[,"theta"]
fit$PearsonResiduals[indices, ] <- sctrans_obj$y
fit$Beta[indices, ] <- sctrans_obj$model_pars_fit[, c("(Intercept)", "log_umi")]
fit$Beta_unreg[indices_unreg, ] <- sctrans_obj$model_pars[, c("(Intercept)", "log_umi")]
fit$Mu <- exp(fit$Beta %*% t(cbind(1, log10(sf))))
fit$Mu_unreg <- exp(fit$Beta_unreg %*% t(cbind(1, log10(sf))))
fit$overdispersions_unreg[indices_unreg] <- 1 / sctrans_obj$model_pars[, "theta"]
fit$gmean[indices] <- sctrans_obj$gene_attr$gmean
fit
}
Load Data
load_data_fnc <- list(
NIH3T3 = function() zellkonverter::readH5AD("../data/nih3t3.h5ad"),
HEK239T = function() zellkonverter::readH5AD("../data/hek293t.h5ad"),
PBMC4k = function() TENxPBMCData::TENxPBMCData("pbmc4k"),
Mesmer = function() scRNAseq::MessmerESCData(),
Zeisel = function() scRNAseq::ZeiselBrainData(),
Zhen = function() DuoClustering2018::sce_filteredExpr10_Zhengmix4eq()
)
Run sctransform 3 times on each dataset
full_data <- bind_rows(lapply(names(load_data_fnc), function(name){
print(paste0("Loading ", name))
se <- load_data_fnc[[name]]()
rownames(se) <- paste0("Gene_", 1:nrow(se))
colnames(se) <- paste0("Cell_", 1:ncol(se))
# Subsample data to 3000x1000 elements
se_red <- se[sample(seq_len(nrow(se)), min(3000, nrow(se))), sample(seq_len(ncol(se)), min(1000, ncol(se)))]
Y <- as.matrix(assay(se_red))
set.seed(1)
sctrans_intermediate <- vst(Y, method = "poisson")
fit_pois <- transform_sctrans_object(sctrans_intermediate, Y)
# To make sure that I am actually running the correct sctransform version
# I check if the indicator value is set
stopifnot(isTRUE(sctrans_intermediate$is_fork))
set.seed(1)
sctrans_intermediate2 <- vst(Y, method = "offset", theta_given = NULL)
fit_off <- transform_sctrans_object(sctrans_intermediate2, Y)
set.seed(1)
sctrans_intermediate3 <- vst(Y, method = "offset", theta_given = 100)
fit_off_fixed <- transform_sctrans_object(sctrans_intermediate3, Y)
theta_given <- rep(100, nrow(Y))
names(theta_given) <- rownames(Y)
set.seed(1)
sctrans_intermediate4 <- vst(Y, method = "nb_theta_given", theta_given = theta_given)
fit_pois_fixed <- transform_sctrans_object(sctrans_intermediate4, Y)
tibble(Dataset = name,
Poisson = c(fit_pois$PearsonResiduals),
PoissonFixed = c(fit_pois_fixed$PearsonResiduals),
Offset = c(fit_off$PearsonResiduals),
OffsetFixed = c(fit_off_fixed$PearsonResiduals),
ExpressionLevel = c(fit_off$Mu),
y = c(Y))
}))
[1] "Loading NIH3T3"
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 852 by 1000
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 852 genes, 1000 cells
|
| | 0%
Registered S3 methods overwritten by 'htmltools':
method from
print.html tools:rstudio
print.shiny.tag tools:rstudio
print.shiny.tag.list tools:rstudio
snapshotDate(): 2021-05-18
|
|======================================================================================== | 50%
|
|================================================================================================================================================================================| 100%
Found 41 outliers - those will be ignored in fitting/regularization step
Second step: Get residuals using fitted parameters for 852 genes
|
| | 0%
|
|======================================================================================== | 50%
|
|================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 13.36397 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 852 by 1000
Model formula is y ~ log_umi
Found 28 outliers - those will be ignored in fitting/regularization step
Second step: Get residuals using fitted parameters for 852 genes
|
| | 0%
|
|======================================================================================== | 50%
|
|================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 3.772232 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 852 by 1000
Model formula is y ~ log_umi
Second step: Get residuals using fitted parameters for 852 genes
|
| | 0%
|
|======================================================================================== | 50%
|
|================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 0.6714771 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 852 by 1000
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 852 genes, 1000 cells
|
| | 0%
|
|======================================================================================== | 50%
|
|================================================================================================================================================================================| 100%
Second step: Get residuals using fitted parameters for 852 genes
|
| | 0%
|
|======================================================================================== | 50%
|
|================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 4.452082 secs
[1] "Loading HEK239T"
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 927 by 1000
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 927 genes, 1000 cells
|
| | 0%
|
|======================================================================================== | 50%
|
|================================================================================================================================================================================| 100%
Found 38 outliers - those will be ignored in fitting/regularization step
Second step: Get residuals using fitted parameters for 927 genes
|
| | 0%
|
|======================================================================================== | 50%
|
|================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 4.839784 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 927 by 1000
Model formula is y ~ log_umi
Found 15 outliers - those will be ignored in fitting/regularization step
Second step: Get residuals using fitted parameters for 927 genes
|
| | 0%
|
|======================================================================================== | 50%
|
|================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 3.911496 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 927 by 1000
Model formula is y ~ log_umi
Second step: Get residuals using fitted parameters for 927 genes
|
| | 0%
|
|======================================================================================== | 50%
|
|================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 0.665138 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 927 by 1000
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 927 genes, 1000 cells
|
| | 0%
|
|======================================================================================== | 50%
|
|================================================================================================================================================================================| 100%
Found 1 outliers - those will be ignored in fitting/regularization step
Second step: Get residuals using fitted parameters for 927 genes
|
| | 0%
|
|======================================================================================== | 50%
|
|================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 4.321753 secs
[1] "Loading PBMC4k"
snapshotDate(): 2021-05-18
see ?TENxPBMCData and browseVignettes('TENxPBMCData') for documentation
loading from cache
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 1127 by 1000
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 1127 genes, 1000 cells
|
| | 0%
|
|=========================================================== | 33%
|
|===================================================================================================================== | 67%
|
|================================================================================================================================================================================| 100%
Found 80 outliers - those will be ignored in fitting/regularization step
Second step: Get residuals using fitted parameters for 1127 genes
|
| | 0%
|
|=========================================================== | 33%
|
|===================================================================================================================== | 67%
|
|================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 5.376468 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 1127 by 1000
Model formula is y ~ log_umi
Found 49 outliers - those will be ignored in fitting/regularization step
Second step: Get residuals using fitted parameters for 1127 genes
|
| | 0%
|
|=========================================================== | 33%
|
|===================================================================================================================== | 67%
|
|================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 5.140258 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 1127 by 1000
Model formula is y ~ log_umi
Second step: Get residuals using fitted parameters for 1127 genes
|
| | 0%
|
|=========================================================== | 33%
|
|===================================================================================================================== | 67%
|
|================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 0.7097969 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 1127 by 1000
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 1127 genes, 1000 cells
|
| | 0%
|
|=========================================================== | 33%
|
|===================================================================================================================== | 67%
|
|================================================================================================================================================================================| 100%
Second step: Get residuals using fitted parameters for 1127 genes
|
| | 0%
|
|=========================================================== | 33%
|
|===================================================================================================================== | 67%
|
|================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 5.405725 secs
[1] "Loading Mesmer"
snapshotDate(): 2021-05-18
see ?scRNAseq and browseVignettes('scRNAseq') for documentation
loading from cache
see ?scRNAseq and browseVignettes('scRNAseq') for documentation
loading from cache
see ?scRNAseq and browseVignettes('scRNAseq') for documentation
loading from cache
snapshotDate(): 2021-05-18
see ?scRNAseq and browseVignettes('scRNAseq') for documentation
loading from cache
snapshotDate(): 2021-05-18
loading from cache
require("ensembldb")
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 1652 by 1000
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 1652 genes, 1000 cells
|
| | 0%
|
|============================================ | 25%
|
|======================================================================================== | 50%
|
|==================================================================================================================================== | 75%
|
|================================================================================================================================================================================| 100%
There are 1 estimated thetas smaller than 1e-07 - will be set to 1e-07
Found 27 outliers - those will be ignored in fitting/regularization step
Second step: Get residuals using fitted parameters for 1652 genes
|
| | 0%
|
|============================================ | 25%
|
|======================================================================================== | 50%
|
|==================================================================================================================================== | 75%
|
|================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 5.554952 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 1652 by 1000
Model formula is y ~ log_umi
Found 2 outliers - those will be ignored in fitting/regularization step
Second step: Get residuals using fitted parameters for 1652 genes
|
| | 0%
|
|============================================ | 25%
|
|======================================================================================== | 50%
|
|==================================================================================================================================== | 75%
|
|================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 4.278342 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 1652 by 1000
Model formula is y ~ log_umi
Second step: Get residuals using fitted parameters for 1652 genes
|
| | 0%
|
|============================================ | 25%
|
|======================================================================================== | 50%
|
|==================================================================================================================================== | 75%
|
|================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 0.7898989 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 1652 by 1000
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 1652 genes, 1000 cells
|
| | 0%
|
|============================================ | 25%
|
|======================================================================================== | 50%
|
|==================================================================================================================================== | 75%
|
|================================================================================================================================================================================| 100%
Second step: Get residuals using fitted parameters for 1652 genes
|
| | 0%
|
|============================================ | 25%
|
|======================================================================================== | 50%
|
|==================================================================================================================================== | 75%
|
|================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 8.076696 secs
[1] "Loading Zeisel"
snapshotDate(): 2021-05-18
see ?scRNAseq and browseVignettes('scRNAseq') for documentation
loading from cache
see ?scRNAseq and browseVignettes('scRNAseq') for documentation
loading from cache
see ?scRNAseq and browseVignettes('scRNAseq') for documentation
loading from cache
snapshotDate(): 2021-05-18
see ?scRNAseq and browseVignettes('scRNAseq') for documentation
loading from cache
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 2391 by 1000
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 2000 genes, 1000 cells
|
| | 0%
|
|============================================ | 25%
|
|======================================================================================== | 50%
|
|==================================================================================================================================== | 75%
|
|================================================================================================================================================================================| 100%
There are 1 estimated thetas smaller than 1e-07 - will be set to 1e-07
Found 4 outliers - those will be ignored in fitting/regularization step
Second step: Get residuals using fitted parameters for 2391 genes
|
| | 0%
|
|=================================== | 20%
|
|====================================================================== | 40%
|
|========================================================================================================== | 60%
|
|============================================================================================================================================= | 80%
|
|================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 7.448519 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 2391 by 1000
Model formula is y ~ log_umi
Found 3 outliers - those will be ignored in fitting/regularization step
Second step: Get residuals using fitted parameters for 2391 genes
|
| | 0%
|
|=================================== | 20%
|
|====================================================================== | 40%
|
|========================================================================================================== | 60%
|
|============================================================================================================================================= | 80%
|
|================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 7.491939 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 2391 by 1000
Model formula is y ~ log_umi
Found 2 outliers - those will be ignored in fitting/regularization step
Second step: Get residuals using fitted parameters for 2391 genes
|
| | 0%
|
|=================================== | 20%
|
|====================================================================== | 40%
|
|========================================================================================================== | 60%
|
|============================================================================================================================================= | 80%
|
|================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 0.9169271 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 2391 by 1000
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 2000 genes, 1000 cells
|
| | 0%
|
|============================================ | 25%
|
|======================================================================================== | 50%
|
|==================================================================================================================================== | 75%
|
|================================================================================================================================================================================| 100%
Found 1 outliers - those will be ignored in fitting/regularization step
Second step: Get residuals using fitted parameters for 2391 genes
|
| | 0%
|
|=================================== | 20%
|
|====================================================================== | 40%
|
|========================================================================================================== | 60%
|
|============================================================================================================================================= | 80%
|
|================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 9.207349 secs
[1] "Loading Zhen"
see ?DuoClustering2018 and browseVignettes('DuoClustering2018') for documentation
loading from cache
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 1556 by 1000
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 1556 genes, 1000 cells
|
| | 0%
|
|============================================ | 25%
|
|======================================================================================== | 50%
|
|==================================================================================================================================== | 75%
|
|================================================================================================================================================================================| 100%
Found 15 outliers - those will be ignored in fitting/regularization step
Second step: Get residuals using fitted parameters for 1556 genes
|
| | 0%
|
|============================================ | 25%
|
|======================================================================================== | 50%
|
|==================================================================================================================================== | 75%
|
|================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 7.262152 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 1556 by 1000
Model formula is y ~ log_umi
Found 18 outliers - those will be ignored in fitting/regularization step
Second step: Get residuals using fitted parameters for 1556 genes
|
| | 0%
|
|============================================ | 25%
|
|======================================================================================== | 50%
|
|==================================================================================================================================== | 75%
|
|================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 7.017199 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 1556 by 1000
Model formula is y ~ log_umi
Found 6 outliers - those will be ignored in fitting/regularization step
Second step: Get residuals using fitted parameters for 1556 genes
|
| | 0%
|
|============================================ | 25%
|
|======================================================================================== | 50%
|
|==================================================================================================================================== | 75%
|
|================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 0.7863991 secs
Calculating cell attributes from input UMI matrix: log_umi
Variance stabilizing transformation of count matrix of size 1556 by 1000
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 1556 genes, 1000 cells
|
| | 0%
|
|============================================ | 25%
|
|======================================================================================== | 50%
|
|==================================================================================================================================== | 75%
|
|================================================================================================================================================================================| 100%
Found 2 outliers - those will be ignored in fitting/regularization step
Second step: Get residuals using fitted parameters for 1556 genes
|
| | 0%
|
|============================================ | 25%
|
|======================================================================================== | 50%
|
|==================================================================================================================================== | 75%
|
|================================================================================================================================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 7.148958 secs
p1 <- full_data %>%
drop_na() %>%
# sample_n(5000) %>%
ggplot(aes(x= Poisson, y = Offset - Poisson)) +
geom_hline(yintercept = 0, color = "#E9E9E9") +
scattermore::geom_scattermore(aes(color = y), pointsize = 2.5, pixels = c(512, 512), alpha = 0.1) +
scale_color_viridis_c(direction = 1, option = "magma", begin = 0.2, end = 0.8,
trans = "log1p", breaks = c(0, 10, 100, 1000),
labels = c(0, 10, 100, ">1000"), limits = c(0, 1001),
oob = scales::oob_squish_any,
minor_breaks = c(0:9, seq(10, 90, by = 10), seq(100, 1000, by = 100))) +
scale_y_continuous(limits = c(-30, 30)) +
facet_wrap(~ Dataset, nrow = 1) +
cowplot::theme_cowplot() +
labs(title = expression(Effect~of~fixing~beta[s]==1),
color = "Y",
x = "Residuals from the default sctransform model",
y = expression(Delta[1]))
p1
p2 <- full_data %>%
drop_na() %>%
# sample_n(5000) %>%
ggplot(aes(x= Poisson, y = PoissonFixed - Poisson)) +
geom_hline(yintercept = 0, color = "#E9E9E9") +
scattermore::geom_scattermore(aes(color = y), pointsize = 2.5, pixels = c(512, 512), alpha = 0.1) +
scale_color_viridis_c(direction = 1, option = "magma", begin = 0.2, end = 0.8,
trans = "log1p", breaks = c(0, 10, 100, 1000),
labels = c(0, 10, 100, ">1000"), limits = c(0, 1001),
oob = scales::oob_squish_any,
minor_breaks = c(0:9, seq(10, 90, by = 10), seq(100, 1000, by = 100))) +
scale_y_continuous(limits = c(-30, 30)) +
facet_wrap(~ Dataset, nrow = 1) +
cowplot::theme_cowplot() +
labs(title = expression(Effect~of~fixing~alpha==0.01),
color = "Y",
x = "Residuals from the default sctransform model",
y = expression(Delta[2]))
p2
p3 <- full_data %>%
drop_na() %>%
# sample_n(5000) %>%
ggplot(aes(x= Poisson, y = OffsetFixed - Poisson)) +
geom_hline(yintercept = 0, color = "#E9E9E9") +
scattermore::geom_scattermore(aes(color = y), pointsize = 2.5, pixels = c(512, 512), alpha = 0.1) +
scale_color_viridis_c(direction = 1, option = "magma", begin = 0.2, end = 0.8,
trans = "log1p", breaks = c(0, 10, 100, 1000),
labels = c(0, 10, 100, ">1000"), limits = c(0, 1001),
oob = scales::oob_squish_any,
minor_breaks = c(0:9, seq(10, 90, by = 10), seq(100, 1000, by = 100))) +
scale_y_continuous(limits = c(-30, 30)) +
facet_wrap(~ Dataset, nrow = 1) +
cowplot::theme_cowplot() +
labs(title = expression(Effect~of~fixing~beta[s]==1~and~alpha==0.01),
color = "Y",
x = "Residuals from the default sctransform model",
y = expression(Delta[3]))
p3
library(patchwork)
res <- (p1 / p2 / p3) +
plot_annotation(tag_levels = "A") + plot_layout(guides = "collect")
res
cowplot::save_plot("../output/sctransform_beta_s_and_alpha_importance.pdf", res,
ncol = 7, nrow = 3, base_asp = 0.6, base_height = 3, device = cairo_pdf)
sessionInfo()
R version 4.1.0 (2021-05-18)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Mojave 10.14.6
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] parallel stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] patchwork_1.1.1 DuoClustering2018_1.10.0 ensembldb_2.16.0 AnnotationFilter_1.16.0 GenomicFeatures_1.44.0 AnnotationDbi_1.54.1
[7] scRNAseq_2.6.1 TENxPBMCData_1.10.0 HDF5Array_1.20.0 rhdf5_2.36.0 DelayedArray_0.18.0 Matrix_1.3-4
[13] sctransform_0.3.2.9002 SingleCellExperiment_1.14.1 SummarizedExperiment_1.22.0 Biobase_2.52.0 GenomicRanges_1.44.0 GenomeInfoDb_1.28.0
[19] IRanges_2.26.0 S4Vectors_0.30.0 BiocGenerics_0.38.0 MatrixGenerics_1.4.0 matrixStats_0.59.0 forcats_0.5.1
[25] stringr_1.4.0 dplyr_1.0.7 purrr_0.3.4 readr_1.4.0 tidyr_1.1.3 tibble_3.1.2
[31] ggplot2_3.3.4 tidyverse_1.3.1
loaded via a namespace (and not attached):
[1] readxl_1.3.1 backports_1.2.1 AnnotationHub_3.0.0 BiocFileCache_2.0.0 plyr_1.8.6 lazyeval_0.2.2
[7] BiocParallel_1.26.0 listenv_0.8.0 scattermore_0.7 digest_0.6.27 htmltools_0.5.1.1 viridis_0.6.1
[13] fansi_0.5.0 magrittr_2.0.1 memoise_2.0.0 globals_0.14.0 Biostrings_2.60.1 modelr_0.1.8
[19] prettyunits_1.1.1 colorspace_2.0-1 blob_1.2.1 rvest_1.0.0 rappdirs_0.3.3 haven_2.4.1
[25] xfun_0.24 crayon_1.4.1 RCurl_1.98-1.3 jsonlite_1.7.2 glue_1.4.2 gtable_0.3.0
[31] zlibbioc_1.38.0 XVector_0.32.0 Rhdf5lib_1.14.1 future.apply_1.7.0 scales_1.1.1 DBI_1.1.1
[37] ggthemes_4.2.4 Rcpp_1.0.6 viridisLite_0.4.0 xtable_1.8-4 progress_1.2.2 reticulate_1.20
[43] mclust_5.4.7 bit_4.0.4 httr_1.4.2 dir.expiry_1.0.0 ellipsis_0.3.2 farver_2.1.0
[49] pkgconfig_2.0.3 XML_3.99-0.6 dbplyr_2.1.1 utf8_1.2.1 labeling_0.4.2 tidyselect_1.1.1
[55] rlang_0.4.11 reshape2_1.4.4 later_1.2.0 munsell_0.5.0 BiocVersion_3.13.1 cellranger_1.1.0
[61] tools_4.1.0 cachem_1.0.5 cli_2.5.0 generics_0.1.0 RSQLite_2.2.7 ExperimentHub_2.0.0
[67] broom_0.7.7 fastmap_1.1.0 yaml_2.2.1 knitr_1.33 bit64_4.0.5 fs_1.5.0
[73] KEGGREST_1.32.0 future_1.21.0 mime_0.10 xml2_1.3.2 biomaRt_2.48.1 compiler_4.1.0
[79] rstudioapi_0.13 filelock_1.0.2 curl_4.3.1 png_0.1-7 interactiveDisplayBase_1.30.0 reprex_2.0.0
[85] stringi_1.6.2 basilisk.utils_1.4.0 lattice_0.20-44 ProtGenerics_1.24.0 vctrs_0.3.8 pillar_1.6.1
[91] lifecycle_1.0.0 rhdf5filters_1.4.0 BiocManager_1.30.16 cowplot_1.1.1 bitops_1.0-7 httpuv_1.6.1
[97] rtracklayer_1.52.0 R6_2.5.0 BiocIO_1.2.0 promises_1.2.0.1 gridExtra_2.3 parallelly_1.26.0
[103] codetools_0.2-18 zellkonverter_1.2.0 MASS_7.3-54 assertthat_0.2.1 rjson_0.2.20 withr_2.4.2
[109] GenomicAlignments_1.28.0 Rsamtools_2.8.0 GenomeInfoDbData_1.2.6 hms_1.1.0 grid_4.1.0 basilisk_1.4.0
[115] shiny_1.6.0 lubridate_1.7.10 restfulr_0.0.13